Freeswitch 停关空识别Mod 编写样例

最近在为公司的外呼系统做停关空识别,这里放一个asr mod模板

可以使用 originate sofia/gateway/xxx/13800000000 start_hd:'10',wait_for_answer,echo inline 来唤起识别,和自己的样本库比较~~(当然识别算法这里没有)~~

另外注意,由于需要录制回铃音,所以ignore_early_media一定要是false,否则录制不到回铃音,说啥都没用了。

这里的识别算法是Java 改写过来的: https://github.com/xdyuchen/AudioScore/blob/master/src/main/java/com/yc/audiodata/AudioDataOperate.java

但是这个版本的算法是不合格的,准确度低,耗时长,计算量大,只是我的一个试验品,所以放出来给大家参考下。

#include <switch.h>
#include <stdio.h>
#include <stdlib.h>
#include <math.h>
#include <string.h>
#include <sys/types.h>
#include <dirent.h>
#include <sys/stat.h>
#include <pthread.h>

#define PI acos(-1)
#define MAX_PATH_LEN (256)

double DATA_START_VALUE = 0.125;
double DATA_END_VALUE = 0.125;

static switch_status_t load_config(void);
static void* initStandardAudioByDir(char* path);
static bool initStandardAudio(char *filePath, char *fileName);
static void filterWave(double audioData[],long size, double b0, double b1);
static long getAudioData(char *filePath, double ** res);
static void normalize(double data[], long size);
static double findMax(double data[], long size);
static long findDataStartIndex(double audioData[], long size);
static long findDataEndIndex(double audioData[], long size);
static long getUsefulData(double audioData[], long size);
static void dealCompareData(double audioData[],long oldSize, long newSize);
static double* shortTimeEnergy(double audioData[], long size);
static double cosineDistance(double standard[], double compare[], long size);

static long calculateStandard(char *filePath, double ** res);
static long calculateCompare(char *filePath, double ** res, long newSize);


SWITCH_MODULE_LOAD_FUNCTION(mod_asr_load);
SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_asr_shutdown);
SWITCH_MODULE_DEFINITION(mod_hangup_detect, mod_asr_load, mod_asr_shutdown, NULL);


static struct {
    switch_memory_pool_t *pool;
    //配置变量
    int deleteFileScore = 90;
    int maxSampleSec = 10;
    char* pcmDir = "/data/freeswitch";
    char* sampleDir = "/data/freeswitch/sample";
} globals;

typedef struct {

    switch_core_session_t   *session;
    switch_media_bug_t      *bug;
    
    FILE                    *stream;
    char                    *callUUID;
    char                    *filePath;

    int                     stop;
    int                     totalSample;
    int                     sampleRate;

    int                     currentMaxSampleSec;

} switch_da_t;


typedef struct {
    char                    audioName[MAX_PATH_LEN];
    long                    size;
    double                  *standardData;
} standard_audio_t;

//全局变量
standard_audio_t *standardAudio[10];
int standardAudioSize = 0;


void* initStandardAudioByDir(void *data) {
    char* path = (char *) data;
    DIR *d = NULL;
    struct dirent *dp = NULL; /* readdir函数的返回值就存放在这个结构体中 */
    struct stat st;
    char p[MAX_PATH_LEN] = {0};
    
    if(stat(path, &st) < 0 || !S_ISDIR(st.st_mode)) {
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD mod invalid path: %s\n", path);
        return NULL;
    }

    if(!(d = opendir(path))) {
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD mod opendir[%s] error: %m\n", path);
        return NULL;
    }

    while((dp = readdir(d)) != NULL) {
        /* 把当前目录.,上一级目录..及隐藏文件都去掉,避免死循环遍历目录 */
        if((!strncmp(dp->d_name, ".", 1)) || (!strncmp(dp->d_name, "..", 2)))
            continue;

        snprintf(p, sizeof(p) - 1, "%s/%s", path, dp->d_name);
        stat(p, &st);
        if(!S_ISDIR(st.st_mode)) {
            initStandardAudio(p, dp->d_name);
        }
    }
    closedir(d);

    pthread_exit(NULL);
}

bool initStandardAudio(char *filePath, char *fileName) {
    double *standardData;
    long size = calculateStandard(filePath, &standardData);
    standard_audio_t *sa1 = (standard_audio_t *)malloc(sizeof(standard_audio_t));
    strcpy(sa1->audioName, fileName);
    sa1->size = size;
    sa1->standardData = standardData;
    standardAudio[standardAudioSize] = sa1;
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "initStandardAudio, name:%s, size:%d\n", standardAudio[standardAudioSize]->audioName, standardAudio[standardAudioSize]->size);
    standardAudioSize++;
    
    return true;
}

void* doCompare(void *arg) {
    switch_da_t *pvt = (switch_da_t *) arg;
    double maxScore = 0;
    int maxScoreIndex = 0;
    
    /////////////////////////////////////////////////////////////////
    double *compareData;
    long size = calculateStandard(pvt->filePath, &compareData);
    for(int i=0; i<standardAudioSize; i++) {
        long minSize = size < standardAudio[i]->size ? size : standardAudio[i]->size;
        double score = cosineDistance(standardAudio[i]->standardData, compareData, minSize);
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s comparewith name:%s, score:%f\n", pvt->callUUID, standardAudio[i]->audioName, score);
        if(score > maxScore) {
            maxScore = score;
            maxScoreIndex = i;
        }
    }
    //使用完compare 数据需要释放,standard需要后续使用不释放
    free(compareData);
    ////////////////////////////////////////////////////////////////

    //为了加快速度  不对目标音频做根据每个标准音频的长度做截取(这样每个都需要计算一次)  不准确
    /*for(int i=0; i<standardAudioSize; i++) {
        double *compareData;
        long size = calculateCompare(pvt->filePath, &compareData, standardAudio[i]->size);

        double score = cosineDistance(standardAudio[i]->standardData, compareData, size);
        //使用完compare 数据需要释放,standard可能需要后续使用不释放
        free(compareData);
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s comparewith name:%s, score:%f\n", pvt->callUUID, standardAudio[i]->audioName, score);
        if(score > maxScore) {
            maxScore = score;
            maxScoreIndex = i;
        }
    }*/
    /////////////////////////////////////////////////////////////////
    
    
    if(maxScore >= globals.deleteFileScore) {
        //删除文件
        if(remove(pvt->filePath) == 0) {
            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s remove temp file:%s, score:%f\n", pvt->callUUID, pvt->filePath, maxScore);
        }
    }

    switch_event_t *event = NULL;
    if (switch_event_create(&event, SWITCH_EVENT_CUSTOM) == SWITCH_STATUS_SUCCESS) {
        event->subclass_name = strdup("hangup_detect");
        switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Event-Subclass", event->subclass_name);
        switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "Call-UUID", pvt->callUUID);
        switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "HD-Sample-File", standardAudio[maxScoreIndex]->audioName);
        char scoreStr[6];
        sprintf(scoreStr, "%.4f", maxScore);
        switch_event_add_header_string(event, SWITCH_STACK_BOTTOM, "HD-Score", scoreStr);
        switch_event_fire(&event);
    }
    pthread_exit(NULL);
}

static switch_bool_t asr_callback(switch_media_bug_t *bug, void *user_data, switch_abc_type_t type)
{
    switch_da_t *pvt = (switch_da_t *)user_data;

    if(pvt == NULL || pvt->stop == 1)
        return SWITCH_FALSE;

    switch_channel_t *channel = switch_core_session_get_channel(pvt->session);

    switch (type) {
        case SWITCH_ABC_TYPE_INIT:
            {
                pvt->filePath = (char*)malloc(strlen(globals.pcmDir) + strlen(pvt->callUUID) + 5 + 1);
                sprintf(pvt->filePath, "%s/%s.pcm", globals.pcmDir, pvt->callUUID);
                pvt->stream = fopen(pvt->filePath, "wb");
                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD Start Succeed channel:%s, file:%s\n",pvt->callUUID, switch_channel_get_name(channel),pvt->filePath);
            }
            break;
        case SWITCH_ABC_TYPE_CLOSE:
            {
                if ( pvt->stream != NULL ) {
                    fclose(pvt->stream);
                }
                switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD Stop Succeed channel:%s\n",pvt->callUUID, switch_channel_get_name(channel));
            }
            break;

        case SWITCH_ABC_TYPE_READ_REPLACE:
            {
                switch_frame_t *frame;
                if ((frame = switch_core_media_bug_get_read_replace_frame(bug))) {
                    char*frame_data = (char*)frame->data;
                    int frame_len = frame->datalen;
                    //默认的采样率  需要转换成8K采样率  每次samples个采样(每秒50次),   8000 / 50 / samples 就是倍率
                    switch_core_media_bug_set_read_replace_frame(bug, frame);

                    long sampleRate = frame->samples * 50;

                    if(pvt->sampleRate == 0) {
                        pvt->sampleRate = 8000;
                        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "%s SWITCH_ABC_TYPE_READ_REPLACE sampleRate before Trans: %f\n",pvt->callUUID, sampleRate);
                    }


                    if(sampleRate > 8000) {
                        int beilv = sampleRate / 8000;
                        char newData[frame_len / beilv];
                        for(int i =0 ;i < frame_len / (beilv * 2);i++) {
                            newData[i * 2] = frame_data[i * beilv * 2];
                            newData[i * 2 + 1] = frame_data[i * beilv * 2 + 1];
                        }

                        if ( pvt->stream != NULL ) {
                            fwrite(newData, sizeof(char), frame_len / beilv , pvt->stream);
                        }
                    } else {
                        if ( pvt->stream != NULL ) {
                            fwrite(frame_data, sizeof(char), frame_len , pvt->stream);
                        }
                    }

                    

                    pvt->totalSample ++;
                    if(pvt->totalSample > pvt->currentMaxSampleSec * 50) {
                        fclose(pvt->stream);
                        pvt->stream = NULL;
                        pvt->stop = 1;

                        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_NOTICE, "%s HD doCompare after %d sec, filePath:%s\n", pvt->callUUID, pvt->currentMaxSampleSec, pvt->filePath);
                        pthread_t tid;
                        int ret = pthread_create(&tid, NULL, doCompare, pvt);
                        if (ret != 0) {
                            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "%s HD doCompare thread create faild, ret: %d\n", pvt->callUUID, ret);
                            return SWITCH_FALSE;
                        }
                        doCompare(pvt);
                    }
                }
    
            }
            break;
        default: break;
    }
    return SWITCH_TRUE;
}


SWITCH_STANDARD_APP(stop_asr_session_function)
{
    switch_da_t *pvt;
    switch_channel_t *channel = switch_core_session_get_channel(session);

    if ((pvt = (switch_da_t*)switch_channel_get_private(channel, "asr"))) {

        switch_channel_set_private(channel, "asr", NULL);
        switch_core_media_bug_remove(session, &pvt->bug);
        switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s Stop HD\n", switch_channel_get_name(channel));

    }
}


SWITCH_STANDARD_APP(start_asr_session_function)
{
    switch_channel_t *channel = switch_core_session_get_channel(session);

    switch_status_t status;
    switch_da_t *pvt;
    switch_codec_implementation_t read_impl;
    memset(&read_impl, 0, sizeof(switch_codec_implementation_t));

    char *argv[2] = { 0 };
    int argc;
    char *lbuf = NULL;


    switch_core_session_get_read_impl(session, &read_impl);

    if (!(pvt = (switch_da_t*)switch_core_session_alloc(session, sizeof(switch_da_t)))) {
        return;
    }

    pvt->stop = 0;
    pvt->totalSample = 0;
    pvt->currentMaxSampleSec = globals.maxSampleSec;
    pvt->sampleRate = 0;
    pvt->session = session;
    pvt->callUUID = switch_core_session_get_uuid(session);

    if (!zstr(data) && (lbuf = switch_core_session_strdup(session, data))) {
        pvt->currentMaxSampleSec = atoi(lbuf);
    }

    if ((status = switch_core_media_bug_add(session, "asr", NULL,
        asr_callback, pvt, 0, SMBF_READ_REPLACE | SMBF_NO_PAUSE | SMBF_ONE_ONLY, &(pvt->bug))) != SWITCH_STATUS_SUCCESS) {
        return;
    }

    switch_channel_set_private(channel, "asr", pvt);
    switch_log_printf(SWITCH_CHANNEL_SESSION_LOG(session), SWITCH_LOG_DEBUG, "%s Start HD %s\n", switch_channel_get_name(channel), switch_core_session_get_uuid(session));
}






SWITCH_MODULE_LOAD_FUNCTION(mod_asr_load)
{
    switch_application_interface_t *app_interface;
    globals.pool = pool;

    *module_interface = switch_loadable_module_create_module_interface(globals.pool, modname);

    SWITCH_ADD_APP(app_interface, "start_hd", "start hd", "start hangup detect", start_asr_session_function, "", SAF_MEDIA_TAP);
    SWITCH_ADD_APP(app_interface, "stop_hd", "stop hd", "stop hangup detect", stop_asr_session_function, "", SAF_NONE);

    switch_status_t status = load_config();
    if(SWITCH_STATUS_SUCCESS == status) {

        pthread_t tid;
        int ret = pthread_create(&tid, NULL, initStandardAudioByDir, globals.sampleDir);
        if (ret != 0) {
            switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "HD load thread create faild, ret: %d\n", ret);
            return SWITCH_STATUS_FALSE;
        }

        //initStandardAudioByDir(globals.sampleDir);
        switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " hangup detect loaded ....\n");
        return SWITCH_STATUS_SUCCESS;
    }
    return status;
}


SWITCH_MODULE_SHUTDOWN_FUNCTION(mod_asr_shutdown)
{
    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " hangup detect shutdown\n");

    for (int i = 0; i< standardAudioSize; i++) {
        free(standardAudio[i]->standardData);
        free(standardAudio[i]->audioName);
    }

    return SWITCH_STATUS_SUCCESS;
}


static switch_status_t load_config(void)
{
	char *cf = "hangup_detect.conf";
    size_t url_len = 0;
    char *api_base;
	switch_xml_t cfg, xml = NULL, param, settings;
	switch_status_t status = SWITCH_STATUS_SUCCESS;

    switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_INFO, "load config\n");
	if ( ! (xml = switch_xml_open_cfg(cf, &cfg, NULL)) ) {
		switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_ERROR, "Open of %s failed\n", cf);
		status = SWITCH_STATUS_FALSE;
		goto done;
	}

	if ( (settings = switch_xml_child(cfg, "settings")) ) {
		for ( param = switch_xml_child(settings, "param"); param; param = param->next ) {
			char *var = (char *) switch_xml_attr_soft(param, "name");
			char *val = (char *) switch_xml_attr_soft(param, "value");
		    if ( strcasecmp(var, "pcmDir") == 0 ) {
				globals.pcmDir = switch_core_strdup(globals.pool, val);
			} else if ( strcasecmp(var, "sampleDir") == 0 ) {
				globals.sampleDir = switch_core_strdup(globals.pool, val);
			} else if ( strcasecmp(var, "deleteFileScore") == 0 ) {
				globals.deleteFileScore = atoi(val);
            } else if ( strcasecmp(var, "maxSampleSec") == 0 ) {
                globals.maxSampleSec = atoi(val);
            }
		}
	}
    done:
	    if (xml) {
		    switch_xml_free(xml);
	    }
	return status;
}



/////////////////////////////////////////////////////


static long calculateStandard(char *filePath, double ** res) {
    long size = getAudioData(filePath, res);
    
    //=====================================归一化
    normalize(*res, size);
    
    //=====================================显示滤波后波形
    filterWave(*res, size, 1, -0.9375);
    normalize(*res, size);

    //=================================截取有效短时数据波形,不然待比较音频数据可能太长导致耗时很久
    size = getUsefulData(*res, size);
    //===================================短时能量波形
    double *arr = shortTimeEnergy(*res, size);
    normalize(arr, size);

    *res = arr;
    return size;
}


static long calculateCompare(char *filePath, double ** res, long newSize) {
    long totalSize = getAudioData(filePath, res);
    newSize = totalSize < newSize ? totalSize : newSize;
    
    //=====================================归一化
    normalize(*res, totalSize);
    for(int i=0;i<10;i++) {
           printf("%f ", (*res)[i]);
    }
    printf("\n");
    //=====================================显示滤波后波形
    filterWave(*res, totalSize, 1, -0.9375);
    normalize(*res, totalSize);
    
    //=================================截取有效短时数据波形,不然待比较音频数据可能太长导致耗时很久
    dealCompareData(*res, totalSize, newSize);
    
    //===================================短时能量波形
    double *arr = shortTimeEnergy(*res, newSize);
    normalize(arr, newSize);
    *res = arr;

    return newSize;
}



/*
 * 获取音频数据
 *
 * @param filePath 音频数据文件路径
 */
static long getAudioData(char *filePath, double ** res) {
    FILE *fp = fopen(filePath, "rb");
    fseek(fp, 0L, SEEK_END);
    long size = ftell(fp) / 2;
    fseek(fp, 0L, 0L);
    
    *res = (double *)malloc(size * sizeof(double));
    
    char buf[2] = {0};
    int audioDataSize = 0;
    while (fread(buf, 1, 2, fp) != -1) {
        // 每16位读取一个音频数据
        (*res)[(int) audioDataSize] = (double) ((short) (((buf[0] & 0xff) << 8) | (buf[1] & 0xff)));
        audioDataSize++;
        if (audioDataSize == size) {
            break;
        }
    }
    
    fclose(fp);
    // file.length() / 2 +1 : /2 : 两位byte数据保存为一位short数据; +1 : 保存文件结尾标志
    return size;
}




/**
 * 归一化
 */
static void normalize(double data[], long size) {
    double max = findMax(data, size);
    //switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, "max %f\n", max);
    for (int i = 0; i < size; i++) {
        data[i] = ((double) data[i] / max);
    }
}



/**
 * 查找最大值
 */
static double findMax(double data[], long size) {
    double max = data[0];
    for (int i = 0; i < size; i++) {
        if (max < fabs(data[i])) {
            max = fabs(data[i]);
        }
    }
    return max;
}


/**
 * 滤波(差分方程)
 */
static void filterWave(double audioData[], long size, double b0, double b1) {
    for (long i = size - 1; i > 0; i--) {
        audioData[i] = b0 * audioData[i] + b1 * audioData[i - 1];
    }
}


/**
 * 截取音频有效数据(通过阈值获得的数据前后开始结束的下标来截取数据)
 */
static long getUsefulData(double audioData[], long size) {
    long start = findDataStartIndex(audioData, size);
    long end = findDataEndIndex(audioData, size);
    printf("getUsefulData: oldLen:%ld, start:%ld, end:%ld \n", size, start, end);
    //switch_log_printf(SWITCH_CHANNEL_LOG, SWITCH_LOG_DEBUG, " getUsefulData %d  %d\n", start, end);
    
    if(start != 0) {
        for (long i = start; i < end; i++) {
            audioData[i - start] = audioData[i];
        }
    } 
    return end - start;
}


/**
 * 处理对比音频使其与标准音频长度相同(通过阈值获得的数据开始下标截取与标准音频相同长度的音频数据
 */
static void dealCompareData(double audioData[],long oldSize, long newSize) {
    long start = findDataStartIndex(audioData, oldSize);
    printf("dealCompareData: oldLen:%ld, start:%ld, newSize:%ld \n", oldSize, start, newSize);
    
    for (int i = 0; i + start < oldSize; i++) {
        //从有效音频开始点截取
        if (i <= newSize) {
            audioData[i] = audioData[i + start];
        } else {
            audioData[i] = 0;
        }
    }
}

/**
 * 通过阈值得到音频有效数据开始的下标
 */
static long findDataStartIndex(double audioData[], long size) {
    for (long i = 0; i < size; i++) {
        if (audioData[i] > DATA_START_VALUE) {
            return i;
        }
    }
    return -1;
}

/**
 * 通过阈值得到音频有效数据结束的下标
 */
static long findDataEndIndex(double audioData[], long size) {
    for (long i = size - 1; i >= 0; i--) {
        if (audioData[i] > DATA_END_VALUE) {
            return i;
        }
    }
    return -1;
}

/**
 * 点乘
 */
static void dotProduct(double data[], long size) {
    for (int i = 0; i < size; i++) {
        data[i] = data[i] * data[i];
    }
}


/**
 * 生成窗函数   hamming窗
 */
static double* generateHammingWindows(int N, int i) {
    // 使用最简单的矩形窗
    //double wins[i * N];
    double *wins = (double *)malloc((i * N) * sizeof(double));
    for (int j = 0; j < i * N; j++) {
        wins[j] = 0.54 - 0.46 * (cos(2 * PI * j / (i * N)));
    }
    return wins;
    // hamming窗
}

/**
 * 计算卷积
 *
 * @param self  数据段
 * @param other 窗函数 (默认窗函数的长度远小于数据长度)
 */
static double* conv(double self[], long sizeS, double other[],long sizeO) {
    double *result = (double *)malloc((sizeS + sizeO - 1) * sizeof(double));;
    double current = 0;
    for (int i = 0; i < sizeS + sizeO - 1; i++) {
        current = 0;
        for (int j = 0; j <= i; j++) {
            if (j >= sizeS || i - j >= sizeO) {
                continue;
            }
            //TODO 去除一些数据的运算提高效率
            
            current += self[j] * other[i - j];
        }
        result[i] = current;
    }
    return result;
}


/**
 * 短时能量
 */
static double* shortTimeEnergy(double audioData[], long size) {
    dotProduct(audioData, size);

    double* wins = generateHammingWindows(32, 16);
    double* res = conv(audioData, size, wins, 32 * 16);
    free(wins);
    free(audioData);
    return res;
}


/**
 * 计算余弦距离 dot(En_compare, En_standard)/(norm(En_compare)*norm(En_standard))
 */
static double cosineDistance(double standard[], double compare[], long size) {
    
    double dot = 0;
    double normStandard = 0;
    double normCompare = 0;
    for (int i = 0; i < size; i++) {
        dot += standard[i] * compare[i];
        normStandard += standard[i] * standard[i];
        normCompare += compare[i] * compare[i];
    }
    double distance = dot / (sqrt(normStandard) * sqrt(normCompare));
    return distance;
}

评论

Your browser is out-of-date!

Update your browser to view this website correctly. Update my browser now

×